package org.systemsbiology.jrap.stax; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamReader; import java.util.Iterator; import java.util.regex.Pattern; import java.util.regex.Matcher; import java.io.StringReader; import java.io.IOException; /** * Created by IntelliJ IDEA. * User: tholzman * Date: Nov 16, 2009 * Time: 3:30:12 PM * To change this template use File | Settings | File Templates. */ //Iterates through long strings within a file that begin with one //pattern and end with another. Useful for cutting out "<scan>...</scan>" //etc. public class EndPatternStringIterator implements Iterator { private static XMLInputFactory inputfactory = XMLInputFactory.newInstance(); private LineIterator li; public LineIterator getLi() { return li; } public void setLi(LineIterator li) { this.li = li; } Pattern leftPat = null; Pattern rightPat = null; private String leftPatStr; public String getLeftPatStr() { return leftPatStr; } public void setLeftPatStr(String leftPatStr) { this.leftPatStr = leftPatStr; this.leftPat = Pattern.compile(leftPatStr); } private String rightPatStr; public String getRightPatStr() { return rightPatStr; } public void setRightPatStr(String rightPatStr) { this.rightPatStr = rightPatStr; this.rightPat = Pattern.compile(rightPatStr); } private long filePos = 0; public long getFilePos() { return filePos; } private int firstLineNo = 0; public int getFirstLineNo() { return this.firstLineNo; } public EndPatternStringIterator(String leftPat, String rightPat, LineIterator li) { setLi(li); setLeftPatStr(leftPat); setRightPatStr(rightPat); } public EndPatternStringIterator(String leftPat, String rightPat, String path) throws IOException { setLi(new LineIterator(new ByteBufferIterator(path))); setLeftPatStr(leftPat); setRightPatStr(rightPat); } StringBuilder curBuf = new StringBuilder(); StringBuilder curLine = null; boolean noMore = false; public boolean hasNext() { return !noMore; } public StringBuilder next() { curBuf.setLength(0); //look for left pattern; //any part of a line left from last time? for(;;) { if(curLine == null || curLine.length() == 0) { if(li.hasNext()) { curLine = li.next(); } else { noMore = true; return curBuf; } } Matcher leftMatch = leftPat.matcher(curLine); if(!leftMatch.find()) { curLine.setLength(0); continue; } //set filepos and start concatenating int leftStartIndex = leftMatch.start(); int leftEndIndex = leftMatch.end(); filePos = li.getFilePos()+leftStartIndex; firstLineNo = li.getLineNum(); curBuf.append(curLine.subSequence(leftStartIndex,leftEndIndex)); //trim left moiety of curLine curLine.delete(0,leftEndIndex); break; } //find right pattern. If there's an EOF, well, return the current working buffer. for(;;) { if(curLine.length() == 0) { curBuf.append(' '); if(li.hasNext()) { curLine = li.next(); } else { noMore = true; return curBuf; } } Matcher rightMatch = rightPat.matcher(curLine); if(!rightMatch.find()) { curBuf.append(curLine); curLine.setLength(0); } else { int rightEndIndex = rightMatch.end(); curBuf.append(curLine.subSequence(0,rightEndIndex)); curLine.delete(0,rightEndIndex); break; } } return curBuf; } public XMLStreamReader xmlsrNext() throws IOException { StringBuilder cursb = next(); XMLStreamReader retVal = null; try { retVal = inputfactory.createXMLStreamReader(new StringBuilderReader(cursb)); } catch (Exception e) { throw new IOException(e); } return retVal; } public XMLStreamReader xmlsrCur() throws Exception { //return inputfactory.createXMLStreamReader(new StringReader(new String(curBuf))); return inputfactory.createXMLStreamReader(new StringBuilderReader(curBuf)); } public void remove() {} public static void main(String argv[]) { try { EndPatternStringIterator epsi = new EndPatternStringIterator(argv[0],argv[1],new LineIterator(new ByteBufferIterator(argv[2]))); while(epsi.hasNext()) { StringBuilder sb = epsi.next(); int lineno = epsi.getFirstLineNo(); long filePos = epsi.getFilePos(); System.out.println("Line: "+lineno+" filePos: "+ filePos+" "+sb); } } catch (Exception e) { System.err.println(e); e.printStackTrace(); } } }